** Data reading and variable selection from raw data
** 22/06/2017
** Taiwan Social Change Survey 2004


** 01. Reading data **
cap log close
clear all
set more off
cd /*insert you work directory here*/
use /*read your data here*/  

** 02. Consructing year and country variables **
ge year = 2004
lab var year "survey year"

ge country=158
lab var country "ISO country code"
//Taiwan: 158 (see "ISO Country Codes.pdf) 


** 03. ID variables **

ge pid=id
lab var pid "person id"


** 04. Basic Demographics (Sex and Age/birth year) **

ge sex=v1
lab var sex "sex"
lab def sex 1 "male" 2 "female"
lab val sex sex

ge birthyr=v2_1 + 1911
lab var birthyr "year of birth"

ge age = year - birthyr
lab var age "age"

** 05. Siblings **

ge nbro=v126_1+v126_2 
lab var nbro "number of brothers"
ge nsis=v126_3+v126_4
lab var nsis "number of sisters"

ge nsibs=nbro+nsis
lab var nsibs "number of siblings"

ge birthorder=v126_1+v126_3+1
lab var birthorder "birth order"

** 06. Own education **

//highest level of education obtained
rename v9a educ
lab var educ "highest education attained"

** 07. Parents' education: Father and/or Mother **

//highest education obtained
rename v13 faeduc
lab var faeduc "father highest education obtained"

rename v14 moeduc
lab var moeduc "mother highest education obtained"

** 08. Own occupation **

//employment status
rename v121 empstat
lab var empstat "employment status"

//occupation - industry
rename v122_a occ_indus
lab var occ_indus "Industry of occupation - country-specific coding (more info not available in documentation"

//occupation - position
rename v122_b1 occ_posit
lab var occ_posit "Position of occupation - country-specific coding (more info not available in documentation"


** 09. Parents' occupation **

//father's occupation 
rename v123_b faocc_posit
lab var faocc_posit "Position of occupation - country-specific coding (more info not available in documentation"

** 10. Tabulate the Identified Variables **

log using /*insert you work directory here*/, replace text

** Data reading and variable selection from raw data
** 22/06/2017
** Taiwan Social Change Survey 2004

** Sex **
tab sex

** Age, Birth Year **
sum age birthyr, d

** Siblings **
sum nsibs nbro nsis, d

** R's Own Education **
tab1 educ 

** Parental Education **
tab1 faeduc moeduc 

** R's Own Occupation **
tab1 empstat occ_indus occ_posit

** Parental Occupation **
tab1 faocc_posit


log close

** 11. Keep the identified variables only

keep year country pid sex age birthyr ///
	 nbro nsis nsibs birthorder ///
	 educ faeduc moeduc ///
	 empstat occ_indus occ_posit ///
	 faocc_posit


** 12. Save the Data File **

saveold /*insert you work directory here*/, replace

** 13. Homoginising education **
** Own Education **
rename educ educ_cat
rename faeduc faeduc_cat
rename moeduc maeduc_cat

ge educ_yrs=.
replace educ_yrs=0 if educ_cat==1
replace educ_yrs=0 if educ_cat==2
replace educ_yrs=6 if educ_cat==3
replace educ_yrs=9 if educ_cat==4
replace educ_yrs=9 if educ_cat==5
replace educ_yrs=12 if educ_cat==6
replace educ_yrs=11 if educ_cat==7
replace educ_yrs=12 if educ_cat==8
replace educ_yrs=12 if educ_cat==9
replace educ_yrs=14 if educ_cat==10
replace educ_yrs=14 if educ_cat==11
replace educ_yrs=15 if educ_cat==12
replace educ_yrs=13 if educ_cat==13
replace educ_yrs=14 if educ_cat==14
replace educ_yrs=14 if educ_cat==15
replace educ_yrs=14 if educ_cat==16
replace educ_yrs=15 if educ_cat==17
replace educ_yrs=16 if educ_cat==18
replace educ_yrs=18 if educ_cat==19
replace educ_yrs=20 if educ_cat==20
lab var educ_yrs "respondent highest education in years"

ge educ_ISCED=020 if educ_cat==1
replace educ_ISCED=020 if educ_cat==2
replace educ_ISCED=100 if educ_cat==3
replace educ_ISCED=200 if educ_cat==4
replace educ_ISCED=200 if educ_cat==5
replace educ_ISCED=340 if educ_cat==6
replace educ_ISCED=350 if educ_cat==7
replace educ_ISCED=350 if educ_cat==8
replace educ_ISCED=300 if educ_cat==9
replace educ_ISCED=400 if educ_cat==10
replace educ_ISCED=400 if educ_cat==11
replace educ_ISCED=400 if educ_cat==12
replace educ_ISCED=400 if educ_cat==13
replace educ_ISCED=400 if educ_cat==14
replace educ_ISCED=500 if educ_cat==15
replace educ_ISCED=500 if educ_cat==16
replace educ_ISCED=500 if educ_cat==17
replace educ_ISCED=600 if educ_cat==18
replace educ_ISCED=700 if educ_cat==19
replace educ_ISCED=800 if educ_cat==20
replace educ_ISCED=. if educ_cat==21 | educ_cat==97 | educ_cat==98
lab var educ_ISCED "respondent highest education in years"

** Parents Education **

ge faeduc_yrs=.
replace faeduc_yrs=0 if faeduc_cat==1
replace faeduc_yrs=0 if faeduc_cat==2
replace faeduc_yrs=6 if faeduc_cat==3
replace faeduc_yrs=9 if faeduc_cat==4
replace faeduc_yrs=9 if faeduc_cat==5
replace faeduc_yrs=12 if faeduc_cat==6
replace faeduc_yrs=11 if faeduc_cat==7
replace faeduc_yrs=12 if faeduc_cat==8
replace faeduc_yrs=12 if faeduc_cat==9
replace faeduc_yrs=14 if faeduc_cat==10
replace faeduc_yrs=14 if faeduc_cat==11
replace faeduc_yrs=15 if faeduc_cat==12
replace faeduc_yrs=13 if faeduc_cat==13
replace faeduc_yrs=14 if faeduc_cat==14
replace faeduc_yrs=14 if faeduc_cat==15
replace faeduc_yrs=14 if faeduc_cat==16
replace faeduc_yrs=15 if faeduc_cat==17
replace faeduc_yrs=16 if faeduc_cat==18
replace faeduc_yrs=18 if faeduc_cat==19
replace faeduc_yrs=20 if faeduc_cat==20
lab var faeduc_yrs "father's highest education in years"

ge maeduc_yrs=.
replace maeduc_yrs=0 if maeduc_cat==1
replace maeduc_yrs=0 if maeduc_cat==2
replace maeduc_yrs=6 if maeduc_cat==3
replace maeduc_yrs=9 if maeduc_cat==4
replace maeduc_yrs=9 if maeduc_cat==5
replace maeduc_yrs=12 if maeduc_cat==6
replace maeduc_yrs=11 if maeduc_cat==7
replace maeduc_yrs=12 if maeduc_cat==8
replace maeduc_yrs=12 if maeduc_cat==9
replace maeduc_yrs=14 if maeduc_cat==10
replace maeduc_yrs=14 if maeduc_cat==11
replace maeduc_yrs=15 if maeduc_cat==12
replace maeduc_yrs=13 if maeduc_cat==13
replace maeduc_yrs=14 if maeduc_cat==14
replace maeduc_yrs=14 if maeduc_cat==15
replace maeduc_yrs=14 if maeduc_cat==16
replace maeduc_yrs=15 if maeduc_cat==17
replace maeduc_yrs=16 if maeduc_cat==18
replace maeduc_yrs=18 if maeduc_cat==19
replace maeduc_yrs=20 if maeduc_cat==20
lab var maeduc_yrs "mother's highest education in years"

ge faeduc_ISCED=020 if faeduc_cat==1
replace faeduc_ISCED=020 if faeduc_cat==2
replace faeduc_ISCED=100 if faeduc_cat==3
replace faeduc_ISCED=200 if faeduc_cat==4
replace faeduc_ISCED=200 if faeduc_cat==5
replace faeduc_ISCED=340 if faeduc_cat==6
replace faeduc_ISCED=350 if faeduc_cat==7
replace faeduc_ISCED=350 if faeduc_cat==8
replace faeduc_ISCED=300 if faeduc_cat==9
replace faeduc_ISCED=400 if faeduc_cat==23
replace faeduc_ISCED=400 if faeduc_cat==10
replace faeduc_ISCED=400 if faeduc_cat==11
replace faeduc_ISCED=400 if faeduc_cat==12
replace faeduc_ISCED=400 if faeduc_cat==13
replace faeduc_ISCED=400 if faeduc_cat==14
replace faeduc_ISCED=500 if faeduc_cat==15
replace faeduc_ISCED=500 if faeduc_cat==16
replace faeduc_ISCED=500 if faeduc_cat==17
replace faeduc_ISCED=600 if faeduc_cat==18
replace faeduc_ISCED=700 if faeduc_cat==19
replace faeduc_ISCED=800 if faeduc_cat==20
replace faeduc_ISCED=. if faeduc_cat==21 | faeduc_cat==97 | faeduc_cat==98
lab var faeduc_ISCED "father highest education in years"

ge maeduc_ISCED=020 if maeduc_cat==1
replace maeduc_ISCED=020 if maeduc_cat==2
replace maeduc_ISCED=100 if maeduc_cat==3
replace maeduc_ISCED=200 if maeduc_cat==4
replace maeduc_ISCED=200 if maeduc_cat==5
replace maeduc_ISCED=340 if maeduc_cat==6
replace maeduc_ISCED=350 if maeduc_cat==7
replace maeduc_ISCED=350 if maeduc_cat==8
replace maeduc_ISCED=300 if maeduc_cat==9
replace maeduc_ISCED=400 if maeduc_cat==10
replace maeduc_ISCED=400 if maeduc_cat==11
replace maeduc_ISCED=400 if maeduc_cat==12
replace maeduc_ISCED=400 if maeduc_cat==13
replace maeduc_ISCED=500 if maeduc_cat==14
replace maeduc_ISCED=500 if maeduc_cat==15
replace maeduc_ISCED=500 if maeduc_cat==16
replace maeduc_ISCED=500 if maeduc_cat==17
replace maeduc_ISCED=600 if maeduc_cat==18
replace maeduc_ISCED=700 if maeduc_cat==19
replace maeduc_ISCED=800 if maeduc_cat==20
replace maeduc_ISCED=. if maeduc_cat==21 | maeduc_cat==97 | maeduc_cat==98
lab var maeduc_ISCED "mother highest education in years"

** 14. Homoginising sibling**
//cutoff
ge nbro_flag=99
lab var nbro_flag "cutoff of number of brothers"
ge nsis_flag=99
lab var nsis_flag "cutoff of number of sisters"
ge nsibs_flag=99
lab var nsibs_flag "cutoff of total number of siblings"

lab def nsib_flag 99 "no cutoff"
lab val nbro_flag nsis_flag nsibs_flag nsib_flag



** 15. Tab Education and Sibling Variables **
tab1 sex age birthyr
tab1 educ_cat educ_yrs faeduc_cat faeduc_yrs maeduc_cat maeduc_yrs 
tab1 nbro nsis nsibs nbro_flag nsis_flag nsibs_flag


** 16. Save the Data File **

saveold /*insert you work directory here*/, replace

